home *** CD-ROM | disk | FTP | other *** search
- From: talcott!think!massar
- Subject: texchk (part 1 of 2) - syntax checker for LaTeX
- Newsgroups: mod.sources
- Approved: jpn@panda.UUCP
-
- Mod.sources: Volume 3, Issue 63
- Submitted by: talcott!think!massar
-
-
- This is 'texchk', a syntax checker for the LaTeX TeX macro package.
-
- JP Massar
- massar@think.com
- ihnp4!think!massar
-
- ------------------------------------------------------------------------------
- #! /bin/sh
- # This is a shell archive, meaning:
- # 1. Remove everything above the #! /bin/sh line.
- # 2. Save the resulting text in a file.
- # 3. Execute the file with /bin/sh (not csh) to create the files:
- # README
- # Makefile
- # texchk.1
- # cmds.h
- # ctools.h
- # texchars.h
- # texchk.h
- # texchk.c
- # This archive created: Tue Dec 17 09:59:40 1985
- export PATH; PATH=/bin:$PATH
- echo shar: extracting "'README'" '(1687 characters)'
- if test -f 'README'
- then
- echo shar: will not over-write existing file "'README'"
- else
- cat << \SHAR_EOF > 'README'
- Instructions for installing texchk:
-
- Assuming you are running a BSD 4.2 or compatible system:
-
- Edit the file Makefile.
- Change the BIN variable to the directory where you want the
- executable to go.
- Change the MS variable to be the 'x' which makes /usr/man/manx
- acceptable to you.
- Change the CC variable if you wish to compile with other than 'cc'.
- Type 'make install -n' to insure that you understand what is
- happening.
- Type 'make install'.
- Type 'make man'.
- Type 'texchk -c -v <your-own-latex-file>' and verify that it is
- producing reasonable output.
- Type 'make clean'.
-
- Assuming you are on a non BSD 4.2 compatible system:
-
- You will probably have to change the call to 'rindex' in ctools.c.
- (Compiling with "-Drindex=strrchr" is sufficient - see the Makefile).
-
- If your compiler is brain damaged you may have to extensively
- shorten and revise the names of various variables and preprocessor
- constants.
-
- Take out the -DBSD42 flag from the Makefile. If your system does
- not have <string.h> (Berkeley called it strings.h), you may have
- to edit ctools.c and texchk.c to declare the strxxx functions.
-
- Modulo these changes, I got texchk to run under BRL's SYSTEM V
- emulation package so hopefully it will run on most systems.
-
-
- The files ctools.c and ctools.h are potentially useful in their own
- right. They contain a library of routines I find or found useful. You
- might look through it for functions you will find useful or you could
- install the whole thing as a library as has been done here at TMC.
-
- JP Massar
- massar@think.com
- ihnp4!think!massar
- SHAR_EOF
- if test 1687 -ne "`wc -c < 'README'`"
- then
- echo shar: error transmitting "'README'" '(should have been 1687 characters)'
- fi
- fi
- echo shar: extracting "'Makefile'" '(592 characters)'
- if test -f 'Makefile'
- then
- echo shar: will not over-write existing file "'Makefile'"
- else
- cat << \SHAR_EOF > 'Makefile'
- CC = cc
-
- #use the following DEFINES line for non-BSD systems
- #DEFINES = -Drindex=strchr
- DEFINES = -DBSD42
-
- #use the following CFLAGS line for debugging
- #CFLAGS = -g $(DEFINES)
- CFLAGS = -O $(DEFINES)
-
- SOURCES = texchk.c cmds.c texchars.c errors.c verbatim.c ctools.c
- OBJECTS = texchk.o cmds.o texchars.o errors.o verbatim.o ctools.o
- LIBS =
- BIN = /usr/local
- MS=1
- MAN = /usr/man/man$(MS)
-
- texchk: $(OBJECTS)
- $(CC) -o texchk $(CFLAGS) $(OBJECTS) $(LIBS)
- install: texchk
- rm -f $(BIN)/texchk
- mv texchk $(BIN)/texchk
- man:
- cp texchk.1 $(MAN)/texchk.$(MS)
- man texchk
- clean:
- rm -f texchk *.o *~
- SHAR_EOF
- if test 592 -ne "`wc -c < 'Makefile'`"
- then
- echo shar: error transmitting "'Makefile'" '(should have been 592 characters)'
- fi
- fi
- echo shar: extracting "'texchk.1'" '(2494 characters)'
- if test -f 'texchk.1'
- then
- echo shar: will not over-write existing file "'texchk.1'"
- else
- cat << \SHAR_EOF > 'texchk.1'
- .TH TEXCHK 1 3/13/85
- .SH NAME
- TEXCHK - syntax checker for LaTeX.
- .SH SYNOPSIS
- .B texchk
- [ -v -c ] [ file1 file2 ... ]
- .SH DESCRIPTION
- By default
- .I texchk
- reads from standard input and outputs error messages and whatnot to
- standard error. If filenames are given as arguments, each file is read
- and parsed in turn.
- .PP
- .I texchk
- checks for the proper nesting of matching '{' '}' and '[' ']' pairs,
- matching '\\begin{<environment>}' '\\end{<environment>} pairs, and Math Mode
- and Display Math Mode constructs. If improper nesting is detected, an error
- message consisting of a reason, a line number, and the actual line of text
- on which the error occurred is displayed.
- .PP
- In general, the program will halt after finding and printing the first
- error, because attempts at further diagnosis would likely result in a
- stream of spurious error messages.
- .PP
- Unlike LaTeX
- .I texchk
- is fast. It can grovel over a large file in a matter of a few seconds,
- and can be run repeatedly without causing the CPU to beg for mercy.
- .PP
- The command line options are:
- .TP
- .B \-v
- Verbose option. Produces on the standard output a trace of each environment
- .I texchk
- enters and leaves, along with the number of the line it is currently
- processing. This is used basically to assure yourself that
- .I texchk
- is actually doing something, if you give it a huge file to process.
- (And is mildly interesting).
- .TP
- .B \-c
- Check mode. Tells
- .I texchk
- to check each keyword (e.g., \\hspace, \\mbox) in the file against a list of
- known keywords, and issue a warning if the keyword is not in the list.
- Also, each keyword is checked as to whether it is only legal if given
- inside of math mode, and if so and if math mode is not enabled, an error
- message is given (but processing continues).
- .SH FILES
- /usr/src/local/cmd/texchk source directory.
- .br
- /usr/local/texchk executable image.
- .SH AUTHOR
- JP Massar, Thinking Machines Corporation
- .SH BUGS
- .I texchk
- makes no claims to being perfect. It is quite possible that an 'error'
- flagged by
- .I texchk
- is not really an error at all if run through LaTeX.
- .PP
- It does not check for number of arguments, presence of optional arguments,
- etc.
- .PP
- It does not know that certain keywords can only be used in certain modes.
- .PP
- It does not understand that '[' and ']' can be used in regular text
- without necessarily being paired. Also, \\left], for example,
- will confuse it totally.
- .PP
- Although not officially part of LaTeX,
- .I texchk
- understands the '$$' construct of TeX.
- SHAR_EOF
- if test 2494 -ne "`wc -c < 'texchk.1'`"
- then
- echo shar: error transmitting "'texchk.1'" '(should have been 2494 characters)'
- fi
- fi
- echo shar: extracting "'cmds.h'" '(803 characters)'
- if test -f 'cmds.h'
- then
- echo shar: will not over-write existing file "'cmds.h'"
- else
- cat << \SHAR_EOF > 'cmds.h'
- typedef struct {
-
- int cmdlength;
- char *cmd;
- char is_math_mode_only;
- unsigned flags;
-
- } Latex_Command, *Ptr_Latex_Command;
-
-
- extern Latex_Command Command_Table[];
-
- extern char *Math_Environments[];
-
- #define COMMAND_TABLE_SIZE (sizeof(Command_Table)/sizeof(Latex_Command))
-
- #define IS_MATH_MODE(i) (Command_Table[(i)].is_math_mode_only)
-
- #define NOT_FOUND -1
-
- extern int command_lookup ();
-
- /* ARGUMENTS:
- char *command;
- */
-
- /* 'command' is the name of a presumed latex command, including the */
- /* initial backslash. Returns NOT_FOUND if command is not in the command */
- /* table, otherwise the index into the command table of the entry for */
- /* the input command. */
-
- extern int is_math_environment();
-
- /* ARGUMENTS:
- char *keyword;
- */
-
- /* Returns 1 or 0 */
- SHAR_EOF
- if test 803 -ne "`wc -c < 'cmds.h'`"
- then
- echo shar: error transmitting "'cmds.h'" '(should have been 803 characters)'
- fi
- fi
- echo shar: extracting "'ctools.h'" '(10814 characters)'
- if test -f 'ctools.h'
- then
- echo shar: will not over-write existing file "'ctools.h'"
- else
- cat << \SHAR_EOF > 'ctools.h'
- /* -*- Mode: C; Package: (CTOOLS C) -*- */
-
- #ifndef Bool
- #define Bool int
- #endif
-
- #ifndef T
- #define T 1
- #endif
-
- #ifndef F
- #define F 0
- #endif
-
- #ifndef MAXINT
- #define MAXINT 2147483647
- #define MAXINTSTR "2147483647"
- #endif
-
- #ifndef MAXPATHLEN
- #define MAXPATHLEN 80
- #endif
-
- extern char *emalloc();
-
- /* int space; */
- /* space must be greater than 0 */
- /* Causes executution to halt with a 'Fatal error' message if memory */
- /* cannot be allocated, otherwise returns pointer to malloc-ed space */
-
- extern char *anewstr();
-
- /* char *astring; */
- /* emalloc's space and copies astring into that space. Returns pointer */
- /* to new string. */
-
-
- extern int copy();
-
- /* char *dest, *src; int n; */
- /* copies exactly n bytes. */
- /* return value undefined. Use only as procedure. */
-
- extern int fill();
-
- /* char *addr, ch; int n; */
- /* copies ch into n consecutive bytes. */
- /* return value undefined. Use only as procedure. */
-
- extern int to_upper_if_lower();
-
- /* char ch; Returns possibly upper-cased value. */
-
- extern int to_lower_if_upper();
-
- /* char ch; Returns possibly lower-cased value. */
-
- extern int buffconcat();
-
- /* char *buffer, *s1, *s2; */
- /* s1 and s2 must be null terminated. Buffer must be at least */
- /* strlen(s1) + strlen(s2) + 1 characters long. Buffer is null */
- /* terminated upon completion. */
-
- /* return value undefined. Use only as procedure. */
-
- extern int nbuffconcat();
-
- /* char *buffer; int n; char *s1,*s2,*s3,*s4,*s5,*s6; */
- /* all the strings must be null terminated. Buffer must be big enough */
- /* to hold the null terminated result. 0 < n < 7 .
- /* returns -1 if n is out of range, otherwise 0 */
-
- extern int slcompare();
-
- /* char *s1; int l1; char *s2; int l2 */
- /* does not stop if it encounters a null character. */
- /* returns 0 if equal, -1 if not equal */
-
- extern int slge_compare();
-
- /* char *s1; int l1; char *s2; int l2 */
- /* does not stop if it encounters a null character. */
- /* returns 0 if equal, -1 if s1 < s2, 1 if s1 > s2 */
-
- extern int nocase_compare();
-
- /* char *s1; int l1; char *s2; int l2 */
- /* does not stop if it encounters a null character. */
- /* returns 0 if equal, -1 if s1 < s2, 1 if s1 > s2 case independently. */
-
- extern char * strfind();
-
- /* char *s1; char *s2; int fast; */
- /* finds s2 as a substring of s1. s1 and s2 are null terminated. */
- /* returns 0 if not found, otherwise pointer into s1 to first matching */
- /* character. */
-
- /* WARNING: will access off the end of s1 in the interest of efficiency */
- /* if 'fast' is non-zero. */
-
- extern char * strncfind();
-
- /* char *s1; char *s2; int fast; */
- /* finds s2 as a substring of s1 case independently. s1 and s2 are */
- /* null terminated. */
- /* returns 0 if not found, otherwise pointer into s1 to first matching */
- /* character. */
-
- /* WARNING: will access off the end of s1 in the interest of efficiency */
- /* if 'fast' is non-zero. */
-
- extern char * strsearch();
-
- /* char *s1; int l1; char *s2; int l2 */
- /* finds s2 as a substring of s1. Does not stop if it encounters a null. */
- /* returns pointer into s1, otherwise (char *) 0 if search fails */
- /* case dependent */
-
- extern char * strncsearch();
-
- /* char *s1; int l1; char *s2; int l2 */
- /* finds s2 as a substring of s1. */
- /* returns pointer into s1, otherwise (char *) 0 if search fails */
- /* case independent */
-
- extern int remove_excess_blanks();
-
- /* char *newstring, *oldstring; */
- /* newstring must be long enough to hold the result, which may be as */
- /* long as oldstring. oldstring must be null terminated. */
- /* an excess blank is any blank before the first non-blank character, */
- /* any blank after the last non-blank character, and any blank immediately */
- /* following a blank. */
- /* returns length of newstring */
-
- extern int yes_or_no_check();
-
- /* char *astring; */
- /* returns 1 for yes, 0 for no, -1 for neither. */
- /* astring must be one of "YES", "Y", "NO", "N" in any capitalization. */
-
-
- /* These routines return T if every char satisfies a certain condition. */
- /* These returns all returns T if given a null string. */
-
- extern Bool all_digits();
- extern Bool all_whitespace();
- extern Bool all_uppercase();
- extern Bool all_lowercase();
- extern Bool all_alphabetic();
- extern Bool all_alphanumeric();
- extern Bool all_ascii();
-
-
- extern int str_to_pos_int();
-
- /* char *astring; int low,high; */
- /* low must be >= 0. */
- /* returns -1 if *astring is not composed of digits. */
- /* returns -2 if the integer is out of range. */
- /* despite its name, 0 can be returned as a legitimate value. */
- /* treats all digit strings as decimal. */
-
-
- extern int sreverse();
-
- /* char *buffer; char *astring; */
- /* puts the characters of astring in reverse order into buffer. */
- /* buffer must be at least as long as astring + 1. */
- /* buffer is null terminated when done. */
- /* No return value. Use only as procedure. */
-
- extern char *ip_sreverse();
-
- /* char *astring; */
- /* Returns astring with its characters reversed. */
- /* reversal is done in place. */
-
-
-
- #define PATH_MAXPATHLEN 256
-
- char *temp_path();
-
- /*
- char *dir; char *filename;
-
- Returns a pointer to a character string containing the string
- <dir>/<filename>. The pointer points to a buffer which will may get
- overwritten if any functions in this package are subsequently called.
- 0 is returned if the pathname would exceed PATH_MAXPATHLEN-1 chars.
- */
-
-
- char *perm_path();
-
- /*
- char *dir; char *filename;
-
- Same as temp_path, except the pathname string is malloc'ed and is thus
- permanent unless specifically freed by the user. Further, no limit
- on the size of the path is made.
- */
-
-
- char *make_path();
-
- /*
- char *dir; char *filename; char *extension; Bool perm;
-
- Creates <dir>/<filename><extension> . The string returned is permanent
- or not depending on 'perm'. If perm is not true, 0 will be returned if
- the resulting path exceeds PATH_MAXPATHLEN-1 chars.
- */
-
-
- char *make_path_numeric_extension();
-
- /*
- char *dir; char *filename; int extension; Bool perm;
-
- Same as make_path except that extension is first converted into a
- string using sprintf.
- */
-
-
- char *just_filename();
-
- /*
- char *path; Bool new; Bool perm;
-
- Given a path of the form /<x>/<y>/<z> returns <z>. If new is not set
- then a pointer into the original input string is returned. If new is
- set a copy is returned, either permanent or not depending on perm.
- */
-
-
- #define ANSWER_NO 0
- #define ANSWER_YES 1
- #define ANSWER_HELP 2
- #define ANSWER_QUIT 3
- #define ANSWER_EOF 4
-
- #define AT_EOF -1
- #define TOO_MANY_CHARS -2
- #define IOERROR -3
- #define TOO_MANY_LINES -4
- #define LINE_TOO_LONG -5
-
- extern read_yes_or_no ();
-
- /* FILE *iport, *oport; char *prompt; char *helpstring; char *quitstring; */
-
- /* prints prompt, then reads from iport until is gets 'Y', 'N', 'YES' or */
- /* 'NO' (case independently). If helpstring and/or quitstring are not */
- /* "" or (char *) 0 then if the user types in one of those ANSWER_HELP */
- /* or ANSWER_QUIT are returned, otherwise ANSWER_NO or ANSWER_YES are */
- /* eventually returned. */
-
-
- extern int getline ();
-
- /* FILE *iport; char *buffer; int buflen; */
-
- /* reads a line into buffer. Does not put the '\n' into buffer. */
- /* Returns AT_EOF if at end of file when called. If it encounters */
- /* end of file after reading at least one character, the eof is treated */
- /* as if it were a newline. Returns TOO_MANY_CHARS if more than */
- /* buflen - 1 characters are read before encountering a newline. */
- /* In this case exactly buflen - 1 characters are read. */
- /* The last character read is always follwed by a '\0'. */
- /* if successful getline returns the number of characters read exclusive */
- /* of a terminating newline or eof. */
-
-
- extern int getlines();
-
- /* FILE *fp; int n; char ***ptr_lines; char *linebuf; int maxlinelen; */
- /* See documentation for getfile below */
- /* If called, 'n' must have a value 0. */
-
- extern int getfile();
-
- /* char *filename; char ***ptr_lines; char *linebuf; int maxlinelen; */
-
- /* read in a file as an array of character strings */
- /* 'maxlinelen+1' is the maximum length a line of the file is allowed */
- /* to be. 'linebuf' must be at least 'maxlinelen+1' characters long. */
- /* Returns the number of lines in the file (and therefore the number */
- /* of entries in *ptr_lines) if successful. Returns IOERROR if it */
- /* could not open the file to read from. Returns TOO_MANY_CHARS if */
- /* it encounters a line longer than 'maxlinelen' characters.
-
- /* Space for each line is malloc'ed as it is read in and the text for */
- /* the jth line is stored in (*ptr_lines)[j] */
-
- /* Only works for fairly small files as it recurses its way through the */
- /* file and does a lot of malloc-ing. Use read_file_into_buffer or */
- /* ngetfile for large files. */
-
- extern int ngetlines();
-
- /* FILE *fp; int n; char ***ptr_lines; char *linebuf; int maxlinelen; */
- /* Same as getlines, except at most 'n' lines will be read. Returns */
- /* TOO_MANY_LINES if more than 'n' lines are present. */
-
- extern int ngetfile();
-
- /* int n; char *filename; char ***ptr_lines; char *linebuf; int maxlinelen; */
- /* See ngetlines above. */
-
- extern int read_file_into_buffer();
-
- /* char *filename;
- char ***ptr_lines;
- int maxlines;
- char *buffer;
- int buflen;
- char *linebuffer;
- int linebuflen;
- */
-
- /* *ptr_lines should be an array of character string pointers maxlines */
- /* big. buffer should be an array of characters buflen long. The routine */
- /* reads lines using getline and stores them into buffer, terminating each */
- /* with a null. A pointer to the nth line read is stored in *ptr_lines[n] */
- /* Returns IOERROR if it cannot open the file for reading, TOO_MANY_LINES */
- /* if more than maxlines were read in, TOO_MANY_CHARS if buffer is */
- /* filled before end of file is reached, and LINE_TOO_LONG is any line is */
- /* longer than linebuflen. Returns number of lines read in if successful. */
-
- extern char *efopen();
-
- /* char *filename; char *mode */
-
- /* Actually returns a (FILE *), so one must cast the return value to this */
- /* type. It doesn't return a (FILE *) explicitly because then to include */
- /* this file one would have to include <stdio.h> explicitly before it. */
- /* The routine simply calls fopen with the same arguments, but prints a */
- /* reasonable error message and calls exit if the call to fopen fails. */
-
-
- Bool check_string();
-
- /* char *str; long minlen; long maxlen; */
-
- /* Returns T if str is not 0 and has a length between minlen and maxlen */
- /* inclusived, otherwise returns F. */
-
-
- #ifndef check_int
- #define check_int(i,minval,maxval) ((i) >= (minval) && (i) <= (maxval))
- #endif
- SHAR_EOF
- if test 10814 -ne "`wc -c < 'ctools.h'`"
- then
- echo shar: error transmitting "'ctools.h'" '(should have been 10814 characters)'
- fi
- fi
- echo shar: extracting "'texchars.h'" '(198 characters)'
- if test -f 'texchars.h'
- then
- echo shar: will not over-write existing file "'texchars.h'"
- else
- cat << \SHAR_EOF > 'texchars.h'
- extern init_legal_chars();
-
- extern char Lgl_Chars[];
- extern char Lgl_Single_Char_Commands[];
-
- #define LGL_CHAR(x) (Lgl_Chars[(x)])
- #define LGL_SINGLE_COMMAND_CHAR(x) (Lgl_Single_Char_Commands[(x)])
- SHAR_EOF
- if test 198 -ne "`wc -c < 'texchars.h'`"
- then
- echo shar: error transmitting "'texchars.h'" '(should have been 198 characters)'
- fi
- fi
- echo shar: extracting "'texchk.h'" '(1489 characters)'
- if test -f 'texchk.h'
- then
- echo shar: will not over-write existing file "'texchk.h'"
- else
- cat << \SHAR_EOF > 'texchk.h'
- /* these are the types of tokens we look for in the input text */
-
- typedef enum {
-
- ESCAPE_BEGIN, ESCAPE_END, ESCAPE_ANY,
- LEFT_SQUARE_BRACKET, RIGHT_SQUARE_BRACKET,
- LEFT_CURLY_BRACKET, RIGHT_CURLY_BRACKET,
- MATH, DOUBLE_MATH, ESCAPE_SINGLE_CHAR
-
- } envtype;
-
- /* some of the input tokens cause the LaTeX environment to be 'pushed' */
- /* following is the definition of the stack on which these tokens are stored */
-
- typedef struct {
- envtype etype;
- char *keyword;
- int linenum;
- } Stack_Entry, *Ptr_Stack_Entry;
-
- #define MAX_ENTRIES 10000
- #define MAX_KEYWORD_LENGTH 100
-
- extern Stack_Entry Lex_Stack[];
- extern int Lex_TOS;
-
- #define Stack_Empty (Lex_TOS < 0)
-
- #define LCB '{'
- #define RCB '}'
- #define LSB '['
- #define RSB ']'
- #define MATH_CHAR '$'
- #define ESCAPE '\\'
- #define COMMENT '%'
-
- #define ENDSTRING "end"
- #define BEGINSTRING "begin"
-
- /* these are the possible actions we can take when we see a token. */
- /* the 'CHECK' action has not been implemented. */
-
- typedef enum {POP,PUSH,CHECK_SINGLE,DOLLAR,DOLLAR_DOLLAR,CHECK} Actions;
-
- /* we keep track of where we are in the text so we can print out nice */
- /* error messages */
-
- extern long Current_Line, Current_Char;
- extern char Line_Buffer[];
- #define MAXLL 1024
-
- extern Indent_Level; /* for verbose pretty print */
-
- extern FILE *fp;
-
- /* this is LaTeX's definition of whitespace */
-
- #define ISWHITE(x) ((x) == ' ' || (x) == '\t')
-
- #define SPACES_PER_INDENT_LEVEL 2
- SHAR_EOF
- if test 1489 -ne "`wc -c < 'texchk.h'`"
- then
- echo shar: error transmitting "'texchk.h'" '(should have been 1489 characters)'
- fi
- fi
- echo shar: extracting "'texchk.c'" '(16712 characters)'
- if test -f 'texchk.c'
- then
- echo shar: will not over-write existing file "'texchk.c'"
- else
- cat << \SHAR_EOF > 'texchk.c'
- /* Texchk -- a LaTeX syntax and spelling checker.
- Written by JP Massar, Thinking Machines Corporation, Cambridge, MA
- This code is hereby released into the public domain, for better or worse.
- */
-
- #include <stdio.h>
- #include <ctype.h>
-
- /* if your system doesn't have either string.h or strings.h you */
- /* may have to declare the string functions yourself */
- #ifdef BSD42
- #include <strings.h>
- #else
- #include <string.h>
- #endif
-
- #ifdef TMC
- #include <ctools.h>
- #else
- #include "ctools.h"
- #endif
-
- #include "texchk.h"
- #include "cmds.h"
- #include "texchars.h"
-
- Bool Verbose_Mode = F; /* -v option */
- Bool Check_Mode = F; /* -c option */
-
- int Indent_Level = 0; /* for verbose output mode */
-
- Stack_Entry Lex_Stack[MAX_ENTRIES]; /* environment stack */
- int Lex_TOS = -1;
-
- FILE *fp; /* file being processed */
- Bool Already_At_Eof = F;
-
- long Current_Line = 0; /* where we are in input text */
- long Current_Char = 0; /* where we are in input text */
- long Line_Length = 0; /* current line length */
- char Line_Buffer[MAXLL]; /* buffer for input text */
-
- Bool In_Math_Mode = F;
- int Math_Mode_Depth = 0;
-
- char Keyword_Buffer[MAX_KEYWORD_LENGTH];
-
-
- new_file ()
- {
- Current_Line = 0;
- Current_Char = 0;
- Line_Length = 0;
- In_Math_Mode = F;
- Math_Mode_Depth = 0;
- Indent_Level = 0;
- }
-
- do_indent (level) int level;
- {
- int j,i;
- for (j = 0; j < level; j++)
- for (i = 0; i < SPACES_PER_INDENT_LEVEL; i++) putc(' ',stderr);
- }
-
-
- lex_push (etype,keyword,linenum) envtype etype; char *keyword; long linenum;
-
- /* push an environment onto the stack */
-
- {
- if (++Lex_TOS >= MAX_ENTRIES) {
- fprintf(stderr,"Stack overflow...Process terminating.\n");
- texit();
- }
- Lex_Stack[Lex_TOS].etype = etype;
- Lex_Stack[Lex_TOS].keyword = keyword;
- Lex_Stack[Lex_TOS].linenum = linenum;
- }
-
-
- lex_pop (ptr_etype,ptr_keyword,ptr_linenum)
-
- /* pop an environment and return its components */
-
- envtype *ptr_etype;
- char **ptr_keyword;
- long *ptr_linenum;
-
- {
- if (Lex_TOS < 0) {
- fprintf(stderr,"Stack underflow...Process terminating\n");
- texit();
- }
- *ptr_etype = Lex_Stack[Lex_TOS].etype;
- *ptr_keyword = Lex_Stack[Lex_TOS].keyword;
- *ptr_linenum = Lex_Stack[Lex_TOS].linenum;
- Lex_TOS--;
- }
-
-
- curstack (ptr_etype,ptr_keyword,ptr_linenum)
-
- /* get the components of the current stack entry, but leave the entry */
- /* on the stack. */
-
- envtype *ptr_etype;
- char **ptr_keyword;
- long *ptr_linenum;
-
- {
- if (Stack_Empty) {
- fprintf(stderr,"Fatal error, bad call to curstack\n");
- texit();
- }
- lex_pop(ptr_etype,ptr_keyword,ptr_linenum);
- lex_push(*ptr_etype,*ptr_keyword,*ptr_linenum);
- }
-
-
- char *copy_keyword (starttoken,endtoken) int starttoken,endtoken;
-
- /* grab a keyword from the Line_Buffer and copy it into a static buffer */
-
- {
- int len;
- if (MAX_KEYWORD_LENGTH <= (len = (endtoken - starttoken) + 1)) {
- keyword_length_error();
- texit();
- }
- strncpy(Keyword_Buffer,Line_Buffer + starttoken,len);
- Keyword_Buffer[len] = '\0';
- return(Keyword_Buffer);
- }
-
-
- do_pop (etype,keyword) envtype etype; char *keyword;
-
- /* make sure that the current environment is the matching begin-environment */
- /* for the end-environment that we have just discovered. If so, pop the */
- /* environment off the stack. If not its an error. */
-
- {
-
- envtype oldetype;
- char *oldkeyword;
- long oldlinenum;
- char *s, *e;
-
- lex_pop(&oldetype,&oldkeyword,&oldlinenum);
-
- switch (etype) {
-
- case ESCAPE_END :
- s = "\\begin";
- e = "\\end";
- if (oldetype != ESCAPE_BEGIN) goto nesterror;
- if (0 != strcmp(oldkeyword,keyword)) goto nesterror;
- break;
-
- case RIGHT_SQUARE_BRACKET :
- s = "[";
- e = "]";
- if (oldetype != LEFT_SQUARE_BRACKET) goto nesterror;
- break;
-
- case RIGHT_CURLY_BRACKET :
- s = "{";
- e = "}";
- if (oldetype != LEFT_CURLY_BRACKET) goto nesterror;
- break;
-
- case MATH :
- s = "Begin Math Mode";
- e = "End Math Mode";
- if (oldetype != etype) goto nesterror;
- break;
-
- case DOUBLE_MATH :
- s = "Begin Display Math Mode";
- e = "End Display Math Mode";
- if (oldetype != etype) goto nesterror;
- break;
-
- }
-
- return(0);
-
- nesterror:
-
- nest_error(s,e,oldlinenum,oldkeyword);
- texit();
-
- }
-
-
- int get_a_char ()
-
- /* buffered input routine, to keep track of line number */
-
- {
- int ch,rval;
- if (Current_Char >= Line_Length) {
- switch (rval = getline(fp,Line_Buffer,MAXLL-2)) {
- case AT_EOF:
- return(EOF);
- break;
- case TOO_MANY_CHARS :
- line_too_long_error();
- texit();
- break;
- default :
- Line_Buffer[rval] = '\n';
- Line_Buffer[++rval] = '\0';
- Line_Length = rval;
- Current_Char = 0;
- Current_Line++;
- break;
- }
- }
- ch = (int) (255 & Line_Buffer[Current_Char++]);
- if (!LGL_CHAR(ch)) bad_char_error(ch,T);
- return(ch);
- }
-
-
- unget_a_char ()
-
- {
- if (Current_Char == 0) {
- fprintf(stderr,"Invalid unget...process terminating\n");
- texit();
- }
- Current_Char--;
- }
-
-
- char *get_keyword ()
-
- /* read a keyword. Keywords consist of contiguous alphabetic characters */
- /* keyword returned is in a static buffer. */
-
- {
- int starttoken,endtoken,ch;
- starttoken = Current_Char - 1;
- endtoken = Current_Char - 1;
- while (isalpha(ch = get_a_char())) {
- endtoken++;
- }
- if (ch == EOF) {
- Already_At_Eof = 1;
- }
- else unget_a_char();
- return(copy_keyword(starttoken,endtoken));
- }
-
-
- char *get_begin_end_keyword ()
-
- /* called after a \begin or \end construct is found. */
- /* begin and end keywords are enclosed in {}. */
- /* a warning is issued if there is any whitespace within the {}s */
- /* returns a string constituting what is in between the {}s save for */
- /* whitespace immediately after the { and immediately before the } */
-
- /* keyword returned is in a static buffer. */
-
- {
- int ch;
- int starttoken,endtoken;
-
- ch = get_a_char();
- if (ch != LCB) {
- no_brace_after_begin_end_error();
- texit();
- }
-
- starttoken = Current_Char;
- endtoken = starttoken - 1;
- while (RCB != (ch = get_a_char())) {
- if (ch == '\n')
- warning_close_brace();
- else if (ch == EOF) {
- eof_error();
- texit();
- }
- else
- endtoken++;
- }
-
- /* ignore whitespace after '{' and before '}' */
-
- if (ISWHITE(Line_Buffer[starttoken]) || ISWHITE(Line_Buffer[endtoken])) {
- warning_blanks_in_cb();
- }
-
- while (starttoken < endtoken && ISWHITE(Line_Buffer[starttoken]))
- starttoken++;
- if (starttoken >= endtoken) {
- blank_begin_end_error();
- texit();
- }
- while (endtoken > starttoken && ISWHITE(Line_Buffer[endtoken]))
- endtoken--;
- return(copy_keyword(starttoken,endtoken));
-
- }
-
-
- get_token (action,etype,keyword)
-
- /* get the next significant token from the input stream. Based on its type */
- /* an action to perform is computed. The significant part of the token is */
- /* returns in *keyword, which points to a static buffer. */
-
- /* returns 0 on encountering EOF, otherwise returns 1. */
-
- Actions *action;
- envtype *etype;
- char **keyword;
-
- {
- int ch,isbegin,isend;
-
- *keyword = 0;
- if (Already_At_Eof) return(0);
-
- readloop:
-
- if (EOF == (ch = get_a_char())) return(0);
-
- switch (ch) {
-
- case LSB :
- *etype = LEFT_SQUARE_BRACKET;
- *action = PUSH;
- *keyword = "[";
- return(1);
-
- case RSB :
- *etype = RIGHT_SQUARE_BRACKET;
- *action = POP;
- *keyword = "]";
- return(1);
-
- case LCB :
- *etype = LEFT_CURLY_BRACKET;
- *action = PUSH;
- *keyword = "{";
- return(1);
-
- case RCB :
- *etype = RIGHT_CURLY_BRACKET;
- *action = POP;
- *keyword = "}";
- return(1);
-
- case MATH_CHAR :
-
- /* Is the next character also a '$'? If so this is 'Display Math Mode' */
-
- if (EOF == (ch = get_a_char())) {
- *action = DOLLAR;
- *etype = MATH;
- *keyword = "$";
- Already_At_Eof = 1;
- }
- else if (ch == MATH_CHAR) {
- *action = DOLLAR_DOLLAR;
- *etype = DOUBLE_MATH;
- *keyword = "$$";
- }
- else {
- unget_a_char();
- *action = DOLLAR;
- *etype = MATH;
- *keyword = "$";
- }
- return(1);
-
- case ESCAPE :
-
- /* treat specially \begin and \end */
-
- if (EOF == (ch = get_a_char())) {
- eof_error();
- texit();
- }
-
- /* first check for single character non-alphabetic commands */
-
- if (!isalpha(ch)) {
- *action = CHECK_SINGLE;
- *etype = ESCAPE_SINGLE_CHAR;
- Keyword_Buffer[0] = ch;
- Keyword_Buffer[1] = '\0';
- *keyword = Keyword_Buffer;
- return(1);
- }
-
-
- *keyword = get_keyword();
- isbegin = (0 == strcmp(*keyword,BEGINSTRING));
- isend = (0 == strcmp(*keyword,ENDSTRING));
- if (!isbegin && !isend) {
- *action = CHECK;
- *etype = ESCAPE_ANY;
- return(1);
- }
-
- *etype = isbegin ? ESCAPE_BEGIN : ESCAPE_END;
- *action = isbegin ? PUSH : POP;
- *keyword = get_begin_end_keyword();
- return(1);
-
- case COMMENT :
-
- /* just read in the rest of the line and ignore what's on it */
-
- while ('\n' != (ch = get_a_char())) {
- if (EOF == ch) return(0);
- }
- goto readloop;
-
- default :
- goto readloop;
-
- }
-
- }
-
- push_math_mode (key) char *key;
- {
- if (Verbose_Mode) {
- do_indent(Indent_Level++);
- fprintf (
- stderr,"Line %d: Entering math mode using <%s>\n",Current_Line,key
- );
- }
- Math_Mode_Depth++;
- In_Math_Mode = T;
- lex_push(MATH,key,Current_Line);
- }
-
- pop_math_mode (key) char *key;
- {
- envtype etype;
- char *keyword;
- long linenum;
- if (Verbose_Mode) {
- do_indent(--Indent_Level);
- fprintf (
- stderr,"Line %d: Leaving math mode using <%s>\n",Current_Line,key
- );
- }
- Math_Mode_Depth--;
- In_Math_Mode = (Math_Mode_Depth > 0);
- lex_pop(&etype,&keyword,&linenum);
- }
-
-
- math_mode_action (action,keyword) Actions action; char *keyword;
-
- /* check for math mode tokens, and enter or leave math mode as appropriate */
-
- {
- char *stack_keyword;
- long linenum;
- envtype etype;
- char *key, *matching_keyword;
-
- switch (action) {
-
- /* If there is a matching '$' or '$$' as the latest entry on the stack */
- /* we pop it because it is a matching token. Otherwise, we push it, */
- /* even if we are already in math mode. */
-
- case (DOLLAR) :
- case (DOLLAR_DOLLAR) :
- key = (action == DOLLAR) ? "$" : "$$";
- if (!In_Math_Mode) {
- push_math_mode(key);
- break;
- }
- curstack(&etype,&stack_keyword,&linenum);
- if (0 != strcmp(key,stack_keyword)) {
- push_math_mode(key);
- }
- else {
- pop_math_mode(key);
- }
- break;
-
- /* just adjust Math Mode for PUSH and POP, because in process_file */
- /* we will do the actual pushing and popping of these environments. */
-
- case (PUSH) :
- if (is_math_environment(keyword)) {
- Math_Mode_Depth++;
- In_Math_Mode = T;
- }
- break;
-
- case (POP) :
- if (is_math_environment(keyword)) {
- Math_Mode_Depth--;
- In_Math_Mode = (Math_Mode_Depth == 0);
- }
- break;
-
- /* look for \( and \[ commands which put us into math mode, and \) and */
- /* \] commands which pop us out of math mode. Make sure if we are */
- /* popping that the proper pushed math mode command is the current */
- /* stack entry. */
-
- case (CHECK_SINGLE) :
- if (*keyword == '(' || *keyword == '[') {
- push_math_mode(anewstr(keyword));
- }
- else if (*keyword == ')' || *keyword == ']') {
- if (Stack_Empty) {
- stack_empty_error(MATH,keyword);
- texit();
- }
- curstack(&etype,&stack_keyword,&linenum);
- matching_keyword = (*keyword == ')') ? "(" : "[";
- if (0 != strcmp(matching_keyword,stack_keyword)) {
- nest_error(matching_keyword,keyword,linenum,stack_keyword);
- texit();
- }
- pop_math_mode(keyword);
- }
- break;
-
- }
-
- }
-
-
- process_file ()
-
- /* Get significant LaTeX forms from the input file. For each one, depending */
- /* on its nature perform a verification or manipulate the environment stack. */
- /* When we are done the stack should be empty. */
-
- /* The file has already been opened using the global file descriptor 'fp' */
-
- {
- Actions action;
- envtype etype;
- char *keyword;
- int cmd_index,ch;
-
- while (0 != get_token(&action,&etype,&keyword)) {
-
- switch (action) {
-
- case (POP) :
-
- /* \end{keyword},, '}', ']' */
-
- if (Stack_Empty) {
- stack_empty_error(etype,keyword);
- texit();
- }
-
- math_mode_action(POP,keyword);
-
- if (Verbose_Mode && *keyword != '}' && *keyword != ']') {
- do_indent(--Indent_Level);
- printf("line %d: \\end{%s}\n",Current_Line,keyword);
- }
- do_pop(etype,keyword);
- break;
-
- case (PUSH) :
-
- /* \begin{keyword}, '{', '[' */
-
- math_mode_action(PUSH,keyword);
-
- if (Verbose_Mode && *keyword != '{' && *keyword != '[') {
- do_indent(Indent_Level++);
- printf("line %d: \\begin{%s}\n",Current_Line,keyword);
- }
-
- if (0==strcmp("verbatim",keyword) || 0==strcmp("verbatim*",keyword)) {
- do_verbatim(keyword);
- break;
- }
- else {
- lex_push(etype,anewstr(keyword),Current_Line);
- break;
- }
-
- case (DOLLAR) :
- math_mode_action(DOLLAR,keyword);
- break;
-
- case (DOLLAR_DOLLAR) :
- math_mode_action(DOLLAR_DOLLAR,keyword);
- break;
-
- case (CHECK_SINGLE) :
-
- /* check for \(, \[, \), \] for math mode */
-
- math_mode_action(CHECK_SINGLE,keyword);
-
- if (Check_Mode) {
- if (!LGL_SINGLE_COMMAND_CHAR(*keyword)) {
- single_char_command_error(*keyword);
- }
- if (NOT_FOUND == (cmd_index = command_lookup(keyword))) {
- fprintf(stderr,"Fatal error:\n");
- fprintf(stderr,"Command Table and Legal Chars out of sync\n");
- exit(1);
- }
- if (!In_Math_Mode && IS_MATH_MODE(cmd_index)) {
- math_keyword_error(keyword);
- }
-
- }
- break;
-
- case (CHECK) :
-
- /* \command token */
-
- if (0 == strcmp("verb",keyword)) {
- if ('*' != (ch = get_a_char())) unget_a_char();
- do_verb();
- break;
- }
-
- if (Check_Mode) {
- if (NOT_FOUND == (cmd_index = command_lookup(keyword))) {
- keyword_error(keyword);
- }
- else if (!In_Math_Mode && IS_MATH_MODE(cmd_index)) {
- math_keyword_error(keyword);
- }
- }
-
- break;
-
- default :
- fprintf(stderr,"Invalid return from get_token...\n");
- texit();
-
- }
-
- }
-
- if (!Stack_Empty) {
- eof_error();
- texit();
- }
-
- return(0);
-
- }
-
-
- texit ()
- {
- fclose(fp);
- exit(1);
- }
-
-
- usage ()
- {
- fprintf(stderr,"\nUnrecognized argument to texchk\n");
- fprintf(stderr,"Usage: texchk [ -v -c ] [ file1 file2 ... ]\n");
- exit(1);
- }
-
-
- main (argc,argv) int argc; char **argv;
-
- {
- char **argptr;
- int j,input_files = 0;
-
- init_legal_chars();
-
- /* process command line arguments */
-
- argptr = argv;
- while (*++argptr != 0) {
- if (**argptr == '-') {
- if (strlen(*argptr) != 2) {
- usage();
- }
- switch ((*argptr)[1]) {
- case 'v' :
- Verbose_Mode = T;
- break;
- case 'c' :
- Check_Mode = T;
- break;
- default :
- usage();
- break;
- }
- *argptr = '\0';
- }
- else input_files = 1;
- }
-
- /* read and process each file */
-
- if (!input_files) {
- printf("\n");
- fp = stdin;
- process_file();
- printf("\nOK!\n\n");
- }
- else {
- for (j = 1; j < argc; j++) {
- if (argv[j] != '\0') {
- printf("\nChecking file %s.\n\n",argv[j]);
- new_file();
- fp = (FILE *) efopen(argv[j],"r");
- process_file();
- fclose(fp);
- }
- }
- }
-
- exit(0);
-
- }
-
-
- SHAR_EOF
- if test 16712 -ne "`wc -c < 'texchk.c'`"
- then
- echo shar: error transmitting "'texchk.c'" '(should have been 16712 characters)'
- fi
- fi
- exit 0
- # End of shell archive
-